Load in libraries
library(tidyverse)
library(here)
library(janitor)
library(tsibble)
library(lubridate)
library(sf)
Load in data sets
waiting_times_raw <- read_csv(here("raw_data/non_covid/monthly_ae_waitingtimes_202206.csv")) %>%
clean_names()
hb <- read_csv(here("clean_data/hb_list_simple.csv"))
hospitals <- read_csv(here("raw_data/general/current-hospital_flagged20211216.csv")) %>%
clean_names() %>%
select(location, location_name)
clean dataset
The following cleaning does the following to the waiting times data
set: - converts month column to year_month dates and extracts years and
months
- joins health board and hospital names to data set
- renames columns to more easily understandable names
- removes columns relating to “episodes”, “qualifier codes” and
“discharges” - adds new value of wait greater than 4 hours to the
data
- replaces all NA values with 0 (this step may be optional)
Please note that 4 hours wait time is the “Target” wait time for the
NHS and is represented by the “wait_lt_4hrs” column values.
# cleaning script
waiting_times <- waiting_times_raw %>%
mutate(date_ym = ym(month), .before = month,
month = month(date_ym, label = TRUE, abbr = FALSE),
year = year(date_ym)) %>%
left_join(hb, c("hbt" = "hb")) %>%
left_join(hospitals, c("treatment_location" = "location")) %>%
rename(total_attendance = number_of_attendances_aggregate,
wait_lt_4hrs = number_meeting_target_aggregate,
wait_gt_8hrs = attendance_greater8hrs,
wait_gt_12hrs = attendance_greater12hrs,
hospital_id = treatment_location,
hospital_name = location_name) %>%
select(date_ym, year, month, hb_name, hospital_id, hospital_name, department_type,
total_attendance, wait_lt_4hrs, wait_gt_8hrs, wait_gt_12hrs) %>%
mutate(wait_gt_4hrs = total_attendance - wait_lt_4hrs, .after = wait_lt_4hrs) %>%
mutate(across(total_attendance:wait_gt_12hrs, .fns = ~coalesce(., 0)))
waiting_times <- read_csv(here("clean_data/wait_times.csv")) %>%
mutate(month = month(date_ym, label = TRUE, abbr = FALSE))
# create table with proportions of wait times used for the analysis
waiting_times_prop <- waiting_times %>%
pivot_longer(contains("wait"), names_to = "wait_category", values_to = "number_of_attendances") %>%
mutate(wait_prop = number_of_attendances / total_attendance,
wait_category = str_replace(wait_category, "wait", "prop")) %>%
select(-number_of_attendances) %>%
pivot_wider(names_from = wait_category, values_from = wait_prop)
waiting_times_prop
Analysis
How does hospital attendances change with time?
# plot all data - attendances per year
waiting_times %>%
group_by(year) %>%
summarise(total_attendance = sum(total_attendance)) %>%
ggplot(aes(x = year, y = total_attendance)) +
geom_line()
# attendances - all data covid seasonal analysis
waiting_times %>%
group_by(year, month) %>%
summarise(total_attendance = sum(total_attendance)) %>%
ggplot(aes(x = month, y = total_attendance, group = year, col = factor(year))) +
geom_line()
# attendances per month - pre-covid seasonal analysis
waiting_times %>%
filter(year < 2020) %>%
group_by(year, month) %>%
summarise(total_attendance = sum(total_attendance)) %>%
ggplot(aes(x = month, y = total_attendance, group = year, col = factor(year))) +
geom_line()
# attendances per month - covid seasonal analysis
waiting_times %>%
filter(year >= 2020) %>%
group_by(year, month) %>%
summarise(total_attendance = sum(total_attendance)) %>%
ggplot(aes(x = month, y = total_attendance, group = year, col = factor(year))) +
geom_line()
health_board_input <- c("Borders", "Lothian", "Ayrshire and Arran", "Dumfries and Galloway")
# health_board_input <- hb$hb_name
paste("Multiple HB:\n", paste(sort(health_board_input), collapse = ", "), sep = "\n")
str_c("Multiple HBs:\n", str_c("Borders", "Lothian", "Dumfries", sep = ",\n"))
# plot all data - attendances per year
if(length(health_board_input) <= 3) {
#first two rows will be from reactive function
p <- waiting_times %>%
filter(hb_name %in% health_board_input) %>%
group_by(date_ym, hb_name) %>%
summarise(total_attendance = sum(total_attendance)) %>%
ggplot(aes(x = date_ym, y = total_attendance, col = hb_name)) +
geom_line()
} else {
if(length(health_board_input) == 14) {
hb_label <- "All Health Boards"
} else {
hb_label <- str_c("Total of Multiple HBs:\n",
str_c(health_board_input, collapse = ",\n"))
}
p <- waiting_times %>%
filter(hb_name %in% health_board_input) %>%
mutate(hb_label = hb_label) %>%
group_by(date_ym) %>%
summarise(total_attendance = sum(total_attendance)) %>%
ggplot(aes(x = date_ym, y = total_attendance, colour = hb_label)) +
geom_line()
}
p +
theme_classic() +
scale_y_continuous(labels = comma,
expand = c(0, 0),
limits = c(0, NA)) +
scale_x_date(date_labels = "%Y",
date_breaks = "1 year") +
labs(title = "Total hospital attendances",
subtitle = "July 2007 to June 2022",
col = "Health Board",
y = "Total attendances") +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.text.align = 0)
How do wait times change per year? (in terms of numbers)
# wait times less than 4 hours
waiting_times %>%
group_by(year) %>%
summarise(wait_lt_4hrs = sum(wait_lt_4hrs)) %>%
ggplot(aes(x = year, y = wait_lt_4hrs)) +
geom_col()
# wait times more than 4 hours
waiting_times %>%
group_by(year) %>%
summarise(wait_gt_4hrs = sum(wait_gt_4hrs)) %>%
ggplot(aes(x = year, y = wait_gt_4hrs)) +
geom_col()
# wait times more than 8 hours
waiting_times %>%
group_by(year) %>%
summarise(wait_gt_8hrs = sum(wait_gt_8hrs)) %>%
ggplot(aes(x = year, y = wait_gt_8hrs)) +
geom_col()
# wait times more than 12 hours
waiting_times %>%
group_by(year) %>%
summarise(wait_gt_12hrs = sum(wait_gt_12hrs)) %>%
ggplot(aes(x = year, y = wait_gt_12hrs)) +
geom_col()
How do wait times change per year? (in terms of proportion)
# wait times less than 4 hours
waiting_times %>%
group_by(year) %>%
summarise(total_attendance = sum(total_attendance),
wait_lt_4hrs = sum(wait_lt_4hrs),
prop_lt_4hrs = wait_lt_4hrs / total_attendance) %>%
ggplot(aes(x = year, y = prop_lt_4hrs)) +
geom_col()
# wait times more than 4 hours
waiting_times %>%
group_by(year) %>%
summarise(total_attendance = sum(total_attendance),
wait_gt_4hrs = sum(wait_gt_4hrs),
prop_gt_4hrs = wait_gt_4hrs / total_attendance) %>%
ggplot(aes(x = year, y = prop_gt_4hrs)) +
geom_col()
# wait times more than 8 hours
waiting_times %>%
group_by(year) %>%
summarise(total_attendance = sum(total_attendance),
wait_gt_8hrs = sum(wait_gt_8hrs),
prop_gt_8hrs = wait_gt_8hrs / total_attendance) %>%
ggplot(aes(x = year, y = prop_gt_8hrs)) +
geom_col()
# wait times more than 12 hours
waiting_times %>%
group_by(year) %>%
summarise(total_attendance = sum(total_attendance),
wait_gt_12hrs = sum(wait_gt_12hrs),
prop_gt_12hrs = wait_gt_12hrs / total_attendance) %>%
ggplot(aes(x = year, y = prop_gt_12hrs)) +
geom_col() +
scale_y_continuous(expand = c(0,0),
labels = scales::percent) +
theme_classic() +
theme(axis.title.x = element_blank()) +
labs(y = "Percentage of attendances greater than 12 hours",
title = "Wait times of >12hours each year")
How do wait times differ between departments? (in terms of
proportions)
# wait times less than 4 hours
waiting_times %>%
group_by(year, department_type) %>%
summarise(total_attendance = sum(total_attendance),
wait_lt_4hrs = sum(wait_lt_4hrs),
prop_lt_4hrs = wait_lt_4hrs / total_attendance) %>%
ggplot(aes(x = year, y = prop_lt_4hrs, fill = department_type)) +
geom_col(position = "dodge")
# wait times more than 4 hours
waiting_times %>%
group_by(year, department_type) %>%
summarise(total_attendance = sum(total_attendance),
wait_gt_4hrs = sum(wait_gt_4hrs),
prop_gt_4hrs = wait_gt_4hrs / total_attendance) %>%
ggplot(aes(x = year, y = prop_gt_4hrs, fill = department_type)) +
geom_col(position = "dodge")
# wait times more than 8 hours
waiting_times %>%
group_by(year, department_type) %>%
summarise(total_attendance = sum(total_attendance),
wait_gt_8hrs = sum(wait_gt_8hrs),
prop_gt_8hrs = wait_gt_8hrs / total_attendance) %>%
ggplot(aes(x = year, y = prop_gt_8hrs, fill = department_type)) +
geom_col(position = "dodge")
# wait times more than 12 hours
waiting_times %>%
group_by(year, department_type) %>%
summarise(total_attendance = sum(total_attendance),
wait_gt_12hrs = sum(wait_gt_12hrs),
prop_gt_12hrs = wait_gt_12hrs / total_attendance) %>%
ggplot(aes(x = year, y = prop_gt_12hrs, fill = department_type)) +
geom_col(position = "dodge")
How do wait times change per year & month? (in terms of
numbers)
# wait times less than 4 hours
waiting_times %>%
group_by(year, month) %>%
summarise(wait_lt_4hrs = sum(wait_lt_4hrs)) %>%
ggplot(aes(x = month, y = wait_lt_4hrs, group = year, col = factor(year))) +
geom_line()
# wait times more than 4 hours
waiting_times %>%
group_by(year, month) %>%
summarise(wait_gt_4hrs = sum(wait_gt_4hrs)) %>%
ggplot(aes(x = month, y = wait_gt_4hrs, group = year, col = factor(year))) +
geom_line()
# wait times more than 8 hours
waiting_times %>%
group_by(year, month) %>%
summarise(wait_gt_8hrs = sum(wait_gt_8hrs)) %>%
ggplot(aes(x = month, y = wait_gt_8hrs, group = year, col = factor(year))) +
geom_line()
# wait times more than 12 hours
waiting_times %>%
group_by(year, month) %>%
summarise(wait_gt_12hrs = sum(wait_gt_12hrs)) %>%
ggplot(aes(x = month, y = wait_gt_12hrs, group = year, col = factor(year))) +
geom_line()
How do wait times vary across health boards?
# in terms of numbers
waiting_times %>%
group_by(date_ym, health_board) %>%
summarise(wait_gt_8hrs = sum(wait_gt_8hrs)) %>%
ggplot(aes(x = date_ym, y = wait_gt_8hrs, col = health_board)) +
geom_line() +
labs(title = "Number of waits > 8 hours for each healthboard")
# in terms of proportion
waiting_times_prop %>%
group_by(date_ym, health_board) %>%
summarise(prop_gt_8hrs = sum(total_attendance * prop_gt_8hrs)/
sum(total_attendance)) %>%
ggplot(aes(x = date_ym, y = prop_gt_8hrs, col = health_board)) +
geom_line() +
labs(title = "Proportion of waits > 8 hours for each healthboard")
# find top 5 healthboards (in terms of attendances)
top_5_attendances_by_hb <- waiting_times_prop %>%
group_by(health_board) %>%
summarise(total_att = sum(total_attendance)) %>%
slice_max(total_att, n = 5)
# find bottom 5 healthboards (in terms of attendances)
bottom_5_attendances_by_hb <- waiting_times_prop %>%
group_by(health_board) %>%
summarise(total_att = sum(total_attendance)) %>%
slice_min(total_att, n = 5)
# wait times across the top 5 healthboards > 8 hours
waiting_times_prop %>%
filter(health_board %in% top_5_attendances_by_hb$health_board) %>%
group_by(date_ym, health_board) %>%
summarise(prop_gt_8hrs = sum(total_attendance * prop_gt_8hrs)/
sum(total_attendance)) %>%
ggplot(aes(x = date_ym, y = prop_gt_8hrs, col = health_board)) +
geom_line() +
labs(title = "Proportion of waits > 8 hours for the top 5 most attended healthboards")
# wait times across the bottom 5 healthboards > 8 hours
waiting_times_prop %>%
filter(health_board %in% bottom_5_attendances_by_hb$health_board) %>%
group_by(date_ym, health_board) %>%
summarise(prop_gt_8hrs = sum(total_attendance * prop_gt_8hrs)/
sum(total_attendance)) %>%
ggplot(aes(x = date_ym, y = prop_gt_8hrs, col = health_board)) +
geom_line() +
labs(title = "Proportion of waits > 8 hours for the bottom 5 most attended healthboards")
# wait times across the top 5 healthboards > 12 hours
waiting_times_prop %>%
filter(health_board %in% top_5_attendances_by_hb$health_board) %>%
group_by(date_ym, health_board) %>%
summarise(prop_gt_12hrs = sum(total_attendance * prop_gt_12hrs)/
sum(total_attendance)) %>%
ggplot(aes(x = date_ym, y = prop_gt_12hrs, col = health_board)) +
geom_line() +
labs(title = "Proportion of waits > 12 hours for the bottom 5 most attended healthboards")
# wait times across the bottom 5 healthboards > 12 hours
waiting_times_prop %>%
filter(health_board %in% bottom_5_attendances_by_hb$health_board) %>%
group_by(date_ym, health_board) %>%
summarise(prop_gt_12hrs = sum(total_attendance * prop_gt_12hrs)/
sum(total_attendance)) %>%
ggplot(aes(x = date_ym, y = prop_gt_12hrs, col = health_board)) +
geom_line() +
labs(title = "Proportion of waits > 12 hours for the bottom 5 most attended healthboards")
# wait times across each hospital in the Borders healthboard
waiting_times_prop %>%
# filter(health_board == "NHS Borders") %>%
group_by(date_ym, hospital_name) %>%
summarise(prop_gt_8hrs = sum(total_attendance * prop_gt_8hrs)/
sum(total_attendance)) %>%
ggplot(aes(x = date_ym, y = prop_gt_8hrs, col = hospital_name)) +
geom_line() +
labs(title = "Proportion of waits > 8 hours for the bottom 5 most attended healthboards") +
theme(legend.position = "none")
most affected healthboards
# in terms of proportion
top_5_hospital_props <- waiting_times_prop %>%
group_by(hospital_name) %>%
summarise(max_prop = max(prop_gt_12hrs)) %>%
arrange(desc(max_prop)) %>%
slice_max(max_prop, n = 5)
# in terms of numbers
waiting_times %>%
group_by(hospital_name) %>%
summarise(max_prop = max(wait_gt_12hrs)) %>%
arrange(desc(max_prop))
# wait times across each hospital in the Borders healthboard
waiting_times_prop %>%
filter(hospital_name %in% top_5_hospital_props$hospital_name) %>%
group_by(date_ym, hospital_name) %>%
summarise(prop_gt_12hrs = sum(total_attendance * prop_gt_12hrs)/
sum(total_attendance)) %>%
ggplot(aes(x = date_ym, y = prop_gt_12hrs, col = hospital_name)) +
geom_line() +
labs(title = "Proportion of waits > 8 hours for the bottom 5 most attended healthboards")
For the years 2007 to 2019, provide a summary of the effects of wait
times at different times of the year.
waiting_times_summary <- waiting_times_prop %>%
filter(year <= 2019) %>%
mutate(month = factor(month, levels = c("July", "August", "September", "October", "November", "December", "January", "February", "March", "April", "May", "June"))) %>%
group_by(month) %>%
summarise(total_attends = mean(total_attendance),
prop_lt_4hrs = sum(total_attendance * prop_lt_4hrs)/ sum(total_attendance),
prop_gt_4hrs = sum(total_attendance * prop_gt_4hrs)/ sum(total_attendance),
prop_gt_8hrs = sum(total_attendance * prop_gt_8hrs)/ sum(total_attendance),
prop_gt_12hrs = sum(total_attendance * prop_gt_12hrs)/ sum(total_attendance))
waiting_times_summary %>%
ggplot(aes(x = month, y = total_attends)) +
geom_col() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x = element_blank()) +
labs(y = "Average monthly attendances",
title = "Average A&E attendances",
subtitle = "From July 2007 to December 2019 ") +
geom_smooth()
waiting_times_summary %>%
ggplot(aes(x = month, y = prop_lt_4hrs, group = 1)) +
geom_point() +
labs(title = "Seasonality of proportion of visits with a wait time < 4 hours",
subtitle = "2007 to 2019") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_smooth()
waiting_times_summary %>%
ggplot(aes(x = month, y = prop_gt_4hrs, group = 1)) +
geom_point() +
labs(title = "Seasonality of proportion of visits with a wait time > 4 hours",
subtitle = "2007 to 2019") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_smooth()
waiting_times_summary %>%
ggplot(aes(x = month, y = prop_gt_8hrs, group = 1)) +
geom_point() +
labs(title = "Seasonality of proportion of visits with a wait time > 8 hours",
subtitle = "2007 to 2019") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_smooth()
waiting_times_summary %>%
ggplot(aes(x = month, y = prop_gt_12hrs, group = 1)) +
geom_point() +
scale_y_continuous(labels = scales::percent) +
labs(title = "Seasonality of proportion of visits with a wait time > 12 hours",
subtitle = "Pre-covid: 2007 to 2019",
y = "Percentage of attendances > 12 hours") +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x = element_blank()) +
geom_smooth(se = FALSE)
The number of A&E attendances decrease during the winter months
however the proportion of wait times greater than 4hours increases
significantly between November and February highlighting that the
complexity of cases in the winter mean that it takes hospitals longer to
discharge patients. Perhaps combining this output with the bed
occupation rates during the winter months could further supplement this
relationship.
waiting_times_prop %>%
# filter(year <= 2019) %>%
mutate(month = factor(month, levels = c("July", "August", "September", "October", "November", "December", "January", "February", "March", "April", "May", "June"))) %>%
group_by(month, health_board) %>%
summarise(total_attends = mean(total_attendance),
prop_lt_4hrs = sum(total_attendance * prop_lt_4hrs)/ sum(total_attendance),
prop_gt_4hrs = sum(total_attendance * prop_gt_4hrs)/ sum(total_attendance),
prop_gt_8hrs = sum(total_attendance * prop_gt_8hrs)/ sum(total_attendance),
prop_gt_12hrs = sum(total_attendance * prop_gt_12hrs)/ sum(total_attendance)) %>%
ggplot(aes(x = month, y = prop_gt_12hrs, group = health_board, colour = health_board)) +
geom_line() +
labs(title = "Seasonality of proportion of visits with a wait time > 12 hours") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
How do attendances to different specialties change over time?
specialties_raw <- read_csv(here("raw_data/non_covid/inpatient_and_daycase_by_nhs_board_of_treatment_and_specialty.csv")) %>%
clean_names()
specialties_raw
specialties <- specialties_raw %>%
inner_join(hb, "hb") %>%
inner_join(hospitals, "location") %>%
select(-ends_with("qf"), -hb, - specialty) %>%
select(quarter, hb_name, location, location_name, everything()) %>%
mutate(year = as.numeric(str_sub(quarter,1, 4)), .before = quarter) %>%
mutate(is_covid_year = case_when(
year >= 2020 ~ TRUE,
year < 2020 ~ FALSE
))
# mutate(quarter = str_sub(quarter, -2, -1))
specialties
all episodes vs time
specialties %>%
distinct(admission_type)
specialties %>%
group_by(quarter) %>%
filter(admission_type == "All Inpatients and Day cases") %>%
summarise(total_episodes = sum(episodes)) %>%
ggplot(aes(x = quarter, y = total_episodes, group = 1)) +
geom_line() +
geom_point() +
theme_classic() +
scale_y_continuous(labels = scales::comma, expand = c(0,0), limits = c(0,NA)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Total admissions for All Inpatients and Day Cases",
x = "Quarter",
y = "Total Episodes")
All Day Cases
specialties %>%
distinct(admission_type)
specialties %>%
group_by(quarter, admission_type) %>%
filter(admission_type %in% c("All Day cases", "All Inpatients")) %>%
summarise(total_episodes = sum(episodes)) %>%
ggplot(aes(x = quarter, y = total_episodes, colour = admission_type)) +
geom_line(aes(group = admission_type)) +
geom_point() +
theme_classic() +
scale_y_continuous(labels = scales::comma, expand = c(0,0), limits = c(0,NA)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Total admissions for All Day Cases and All Inpatients",
x = "Quarter",
y = "Total Episodes",
col = "Admission Type")
all_inpatient_cats <- c("Elective Inpatients",
"Emergency Inpatients",
"Transfers")
specialties %>%
group_by(quarter, admission_type) %>%
filter(admission_type %in% all_inpatient_cats) %>%
summarise(total_episodes = sum(episodes)) %>%
ggplot(aes(x = quarter, y = total_episodes, colour = admission_type)) +
geom_line(aes(group = admission_type)) +
geom_point() +
theme_classic() +
scale_y_continuous(labels = scales::comma, expand = c(0,0), limits = c(0,NA)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Total admissions for Inpatients",
x = "Quarter",
y = "Total Episodes",
col = "Admission Type")
specialties %>%
distinct(admission_type, specialty_name)
specialties %>%
group_by(quarter, admission_type, specialty_name) %>%
filter(admission_type == "Emergency Inpatients") %>%
summarise(total_episodes = sum(episodes)) %>%
ggplot(aes(x = quarter, y = total_episodes, colour = specialty_name)) +
geom_line(aes(group = specialty_name)) +
geom_point() +
theme_classic() +
scale_y_continuous(labels = scales::comma, expand = c(0,0), limits = c(0,10000)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Total admissions for Emergency Inpatients",
subtitle = "Split by specialty",
x = "Quarter",
y = "Total Episodes",
col = "Admission Type")
Which departments changed the most between Covid and pre-covid
total_admissions <- specialties %>%
group_by(is_covid_year) %>%
filter(admission_type == "All Inpatients and Day cases") %>%
summarise(total_admissions = sum(episodes)) %>%
pull(total_admissions)
change_in_at_specialties <- specialties %>%
filter(admission_type %in% c(all_inpatient_cats, "All Day cases")) %>%
group_by(admission_type, specialty_name, is_covid_year) %>%
summarise(total_episodes = sum(episodes)) %>%
pivot_wider(names_from = is_covid_year, values_from = total_episodes) %>%
rename("covid_year" = "TRUE", "pre_covid_year" = "FALSE") %>%
mutate(pre_covid_year_prop = pre_covid_year / total_admissions[1],
covid_year_prop = covid_year / total_admissions[2],
percentage_change = ((covid_year_prop / pre_covid_year_prop) - 1) * 100) %>%
ungroup()
specialties %>%
filter(admission_type == "All Day cases" & specialty_name == "Cardiothoracic Surgery") %>%
group_by(is_covid_year) %>%
summarise(count = sum(episodes))
# top 5 changes in specialty proportion
change_in_at_specialties %>%
slice_max(percentage_change, n = 5)
# sort by largest proportion, top 5
change_in_at_specialties %>%
slice_max(percentage_change, n = 5) %>%
arrange(desc(covid_year_prop))
change_in_at_specialties %>%
filter(covid_year > 1000) %>%
slice_max(percentage_change, n = 5) %>%
mutate(label = str_c(specialty_name, "\n(", admission_type, ")")) %>%
ggplot(aes(x = reorder(label, sort(percentage_change)),
y = percentage_change)) +
geom_col() +
theme_classic() +
scale_y_continuous(expand = c(0,0)) +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(size = 8)) +
labs(y = "Percentage Increase (%)",
title = "Top 5 increases in hospital admissions (by specialty and admission type)",
subtitle = "More than 1,000 admissions")
change_in_specialties <- specialties %>%
group_by(specialty_name, is_covid_year) %>%
summarise(total_episodes = sum(episodes)) %>%
pivot_wider(names_from = is_covid_year, values_from = total_episodes) %>%
rename("covid_year" = "TRUE", "pre_covid_year" = "FALSE") %>%
mutate(pre_covid_year_prop = pre_covid_year / total_admissions[1],
covid_year_prop = covid_year / total_admissions[2],
percentage_change = ((covid_year_prop / pre_covid_year_prop) - 1) * 100) %>%
ungroup()
change_in_specialties %>%
slice_max(percentage_change, n = 5) %>%
filter(percentage_change > 0) %>%
arrange(desc(percentage_change)) %>%
ggplot(aes(x = reorder(specialty_name, percentage_change, decreasing = TRUE),
y = percentage_change)) +
geom_col() +
theme_classic() +
scale_y_continuous(expand = c(0,0)) +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(size = 8)) +
labs(y = "Percentage Increase (%)",
title = "Top 5 changes in hospital admissions (by specialty) - pre-Covid vs Covid",
subtitle = "More than 1,000 admissions")
wt_with_discharges <- waiting_times_raw %>%
mutate(date_ym = ym(month), .before = month,
month = month(date_ym, label = TRUE, abbr = FALSE),
year = year(date_ym)) %>%
left_join(hb, c("hbt" = "hb")) %>%
left_join(hospitals, c("treatment_location" = "location")) %>%
rename(total_attendance = number_of_attendances_aggregate,
wait_lt_4hrs = number_meeting_target_aggregate,
wait_gt_8hrs = attendance_greater8hrs,
wait_gt_12hrs = attendance_greater12hrs,
health_board = hb_name,
hospital_id = treatment_location,
hospital_name = location_name) %>%
select(-ends_with("qf")) %>%
select(date_ym, year, month, health_board, hospital_id, hospital_name,
everything(), -country, - hbt)
# check if there is a trend across all years
wt_with_discharges %>%
group_by(date_ym, hospital_name) %>%
summarise(transfers = sum(discharge_destination_transfer, na.rm = TRUE)) %>%
ggplot(aes(x = date_ym, y = transfers, col = hospital_name)) +
geom_line() +
theme(legend.position = "none")
# check if there is a seasonal trend
wt_with_discharges %>%
mutate(month = factor(month, levels = c("July", "August", "September", "October", "November", "December", "January", "February", "March", "April", "May", "June"))) %>%
group_by(month, health_board) %>%
summarise(transfers = sum(discharge_destination_other_specialty, na.rm = TRUE)) %>%
ggplot(aes(x = month, y = transfers, group = health_board, col = health_board)) +
geom_line() +
theme(legend.position = "none") +
facet_wrap(~ health_board, scales = "free_y") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
Bed occupancy data
beds_raw <- read_csv(here("raw_data/non_covid/beds_by_nhs_board_of_treatment_and_specialty.csv")) %>%
clean_names() %>%
select(-ends_with("qf"))
# all specialty names
beds_raw %>%
group_by(specialty_name) %>%
summarise(sum(all_staffed_beddays))
beds_raw %>%
filter(specialty_name != "All Acute" & specialty_name != "All Specialties") %>%
# group_by(specialty_name) %>%
summarise(sum(all_staffed_beddays))
# 209139660
beds <- beds_raw %>%
left_join(hb, "hb") %>%
left_join(hospitals, "location") %>%
select(quarter, hb, hb_name, location, location_name, specialty_name:percentage_occupancy) %>%
mutate()
beds %>%
group_by(quarter) %>%
summarise(staffed_beds = sum(all_staffed_beddays),
occupied_beds = sum(total_occupied_beddays),
occupation_percentage = occupied_beds / staffed_beds * 100) %>%
ggplot(aes(x = quarter, y = occupation_percentage, group = 1)) +
geom_line()
Wait times - baseline (pre-covid vs covid)
Percentage of wait times within target (<4hrs)
# pre-covid <= 2019, covid => 2020
avg_wt_target_pre_covid <- waiting_times %>%
filter(year <= 2019) %>%
group_by(month) %>%
summarise(total_attendance = sum(total_attendance),
total_wait_lt_4hrs = sum(wait_lt_4hrs),
target_wait_prop = total_wait_lt_4hrs / total_attendance)
avg_wt_target_pre_covid %>%
ggplot(aes(x = month, y = target_wait_prop, group = 1)) +
geom_line() +
geom_smooth(se = 0)
waiting_times %>%
filter(year >= 2020) %>%
group_by(year, month) %>%
summarise(total_attendance = sum(total_attendance),
total_wait_lt_4hrs = sum(wait_lt_4hrs),
target_wait_prop = total_wait_lt_4hrs / total_attendance) %>%
ggplot(aes(x = month, y = target_wait_prop,
group = factor(year), col = factor(year))) +
geom_line() +
geom_point() +
geom_line(data = avg_wt_target_pre_covid, aes(x = month, y = target_wait_prop,
group = 1, colour = "Baseline"),
size = 1) +
geom_point(data = avg_wt_target_pre_covid, aes(x = month, y = target_wait_prop,
group = 1, colour = "Baseline")) +
theme_classic() +
scale_color_manual(name = "Year", values = c("Baseline" = "darkblue",
"2020" = "red",
"2021" = "green",
"2022" = "orange")) +
scale_y_continuous(labels = scales::percent) +
labs(y = "Percentage of admissions (%)",
title = "Proportion of waiting times greater than 12 hours",
subtitle = "Pre-covid (2007-2019) vs covid (2020+)",
col = "Year") +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))
Percentage of wait times >8hrs
avg_wt_gt_8hrs_pre_covid <- waiting_times %>%
filter(year <= 2019) %>%
group_by(month) %>%
summarise(total_attendance = sum(total_attendance),
total_wait_gt_8hrs = sum(wait_gt_8hrs),
target_wait_prop = total_wait_gt_8hrs / total_attendance)
avg_wt_gt_8hrs_pre_covid %>%
ggplot(aes(x = month, y = target_wait_prop, group = 1)) +
geom_line() +
geom_smooth(se = 0)
waiting_times %>%
filter(year >= 2020) %>%
group_by(year, month) %>%
summarise(total_attendance = sum(total_attendance),
total_wait_gt_8hrs = sum(wait_gt_8hrs),
target_wait_prop = total_wait_gt_8hrs / total_attendance) %>%
ggplot(aes(x = month, y = target_wait_prop,
group = factor(year), col = factor(year))) +
geom_line() +
geom_point() +
geom_line(data = avg_wt_gt_8hrs_pre_covid, aes(x = month, y = target_wait_prop,
group = 1, colour = "Baseline"),
size = 1) +
geom_point(data = avg_wt_gt_8hrs_pre_covid, aes(x = month, y = target_wait_prop,
group = 1, colour = "Baseline")) +
theme_classic() +
scale_color_manual(name = "Year", values = c("Baseline" = "darkblue", "2020" = "red", "2021" = "green", "2022" = "orange")) +
scale_y_continuous(labels = scales::percent) +
labs(y = "Percentage of admissions (%)",
title = "Proportion of waiting times greater than 8 hours",
subtitle = "Pre-covid (2007-2019) vs covid (2020+)",
col = "Year") +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))
Percentage of wait times >12hrs
avg_wt_gt_12hrs_pre_covid <- waiting_times %>%
filter(year <= 2019) %>%
group_by(month) %>%
summarise(total_attendance = sum(total_attendance),
total_wait_gt_12hrs = sum(wait_gt_12hrs),
target_wait_prop = total_wait_gt_12hrs / total_attendance)
avg_wt_gt_12hrs_pre_covid %>%
ggplot(aes(x = month, y = target_wait_prop, group = 1)) +
geom_line() +
geom_smooth(se = 0)
waiting_times %>%
filter(year >= 2020) %>%
group_by(year, month) %>%
summarise(total_attendance = sum(total_attendance),
total_wait_gt_12hrs = sum(wait_gt_12hrs),
target_wait_prop = total_wait_gt_12hrs / total_attendance) %>%
ggplot(aes(x = month, y = target_wait_prop,
group = factor(year), col = factor(year))) +
geom_line() +
geom_point() +
geom_line(data = avg_wt_gt_12hrs_pre_covid, aes(x = month, y = target_wait_prop,
group = 1, colour = "Baseline"),
size = 1) +
geom_point(data = avg_wt_gt_12hrs_pre_covid, aes(x = month, y = target_wait_prop,
group = 1, colour = "Baseline")) +
theme_classic() +
scale_color_manual(name = "Year", values = c("Baseline" = "darkblue",
"2020" = "red",
"2021" = "green",
"2022" = "orange")) +
scale_y_continuous(labels = scales::percent) +
labs(y = "Percentage of admissions (%)",
title = "Proportion of waiting times greater than 12 hours",
subtitle = "Pre-covid (2007-2019) vs covid (2020+)",
col = "Year") +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))
Jack’s question:
beds_treatment_specialty <- beds
df1 <- beds_treatment_specialty %>% #pre covid dates, all health baords
mutate(date = yq(quarter), month = month(date, label = TRUE, abbr = TRUE),
year = year(date)) %>%
filter(year <= 2019, hb == "S08000015") %>% # hb selected by user input
group_by(month) %>%
summarise(avg_occupancy = mean(percentage_occupancy, na.rm = TRUE))
df2 <- beds_treatment_specialty %>%
mutate(date = yq(quarter), month = month(date, label = TRUE, abbr = TRUE),
year = year(date)) %>%
filter(year > 2019, hb == "S08000015") %>% #post covid, filter based on map input
group_by(year, month) %>%
summarise(avg_occupancy = mean(percentage_occupancy, na.rm = TRUE))
## no longer required
# df3 <- df1 %>%
# mutate(Type = "baseline") %>%
# bind_rows(df2 %>%
# mutate(Type = "post-covid"))
df2 %>%
ggplot(aes(x = month, y = avg_occupancy, colour = factor(year), group = year)) +
geom_point() +
geom_line() +
geom_line(data = df1, aes(x = month, y = avg_occupancy, group = 1, col = "Baseline")) +
geom_point(data = df1, aes(x = month, y = avg_occupancy, group = 1, col = "Baseline")) +
scale_color_manual(name = "Year", values = c("Baseline" = "darkblue",
"2020" = "red",
"2021" = "green")) +
labs(title = "Avg Occupancy by Health Board vs Baseline", # need to update these
x = "Month",
y = "Avg Occupancy Rate")
df2_wide <- df2 %>%
pivot_wider(names_from = year, values_from = avg_occupancy)
df12 <- df1 %>%
rename(baseline = avg_occupancy) %>%
left_join(df2_wide, "month")
df12
df3 <- beds_treatment_specialty %>% #pre covid dates, all health baords
mutate(date = yq(quarter), month = month(date, label = TRUE, abbr = TRUE),
year = year(date)) %>%
filter(year <= 2019, hb == "S08000015") %>% # hb selected by user input
group_by(month) %>%
summarise(total_available = sum(all_staffed_beddays),
total_occupied = sum(total_occupied_beddays),
avg_occupancy = total_occupied / total_available)
df4 <- beds_treatment_specialty %>%
mutate(date = yq(quarter), month = month(date, label = TRUE, abbr = TRUE),
year = year(date)) %>%
filter(year > 2019, hb == "S08000015") %>% #post covid, filter based on map input
group_by(year, month) %>%
summarise(total_available = sum(all_staffed_beddays),
total_occupied = sum(total_occupied_beddays),
avg_occupancy = total_occupied / total_available)
df4 %>%
ggplot(aes(x = month, y = avg_occupancy, colour = factor(year), group = year)) +
geom_point() +
geom_line() +
geom_line(data = df3, aes(x = month, y = avg_occupancy, group = 1, col = "Baseline")) +
geom_point(data = df3, aes(x = month, y = avg_occupancy, group = 1, col = "Baseline")) +
scale_color_manual(name = "Year", values = c("Baseline" = "darkblue",
"2020" = "red",
"2021" = "green")) +
labs(title = "Avg Occupancy by Health Board vs Baseline", # need to update these
x = "Month",
y = "Avg Occupancy Rate") +
scale_y_continuous(labels = scales::percent)
df4_wide <- df2 %>%
pivot_wider(names_from = year, values_from = avg_occupancy)
df34 <- df3 %>%
rename(baseline = avg_occupancy) %>%
left_join(df2_wide, "month") %>%
select(-total_available, - total_occupied) %>%
mutate(baseline = baseline * 100)
# weighted average occupancy rate
df34
# average of occupancy percentages
df12
how does bed usage change over time
beds %>%
group_by(quarter) %>%
summarise(beds_available = sum(all_staffed_beddays),
beds_occupied = sum(total_occupied_beddays)) %>%
pivot_longer(beds_available:beds_occupied, names_to = "bed_type",
values_to = "num_beds") %>%
ggplot(aes(x = quarter, y = num_beds, colour = bed_type, group = bed_type)) +
geom_line() +
scale_y_continuous(labels = scales::comma) +
theme_classic() +
theme(axis.text = element_text(angle = 45, hjust = 1),
axis.title.x = element_blank()) +
labs(y = "Number of bed days",
title = "Number of beds available/occupied over time",
col = "Bed Type")
bed use by specialty
beds %>%
group_by(quarter, specialty_name) %>%
# filter(specialty_name %in% c("Clinical Radiology", "Intensive Care Medicine")) %>%
summarise(beds_available = sum(all_staffed_beddays),
beds_occupied = sum(total_occupied_beddays)) %>%
pivot_longer(beds_available:beds_occupied, names_to = "bed_type",
values_to = "num_beds") %>%
ggplot(aes(x = quarter, y = num_beds, colour = specialty_name, group = specialty_name)) +
geom_line() +
facet_wrap(~bed_type) +
scale_y_continuous(labels = scales::comma) +
theme_classic() +
theme(axis.text = element_text(angle = 45, hjust = 1),
axis.title.x = element_blank()) +
labs(y = "Number of bed days",
title = "Number of beds available/occupied over time",
col = "Bed Type")
beds %>%
mutate(year = as.numeric(str_extract(quarter, "[0-9]{4}")),
is_covid_year = case_when(
year <= 2019 ~ FALSE,
year >= 2020 ~ TRUE
)) %>%
group_by(specialty_name, is_covid_year) %>%
summarise(beds_available = sum(all_staffed_beddays),
beds_occupied = sum(total_occupied_beddays)) %>%
select(specialty_name, is_covid_year, beds_occupied) %>%
pivot_wider(names_from = is_covid_year, values_from = beds_occupied) %>%
rename("pre_covid" = "FALSE", "covid" = "TRUE") %>%
drop_na() %>%
mutate(diff_occupied = covid - pre_covid,
diff_occupied_pc = ((covid / pre_covid) - 1) * 100) %>%
filter(diff_occupied_pc > 0) %>%
arrange(desc(diff_occupied_pc))
waiting times
cleaning
hb <- read_csv(here("clean_data/hb_list_simple.csv"))
waiting_times <- waiting_times_raw %>%
mutate(date_ym = ym(month), .before = month,
month = month(date_ym, label = TRUE, abbr = FALSE),
year = year(date_ym)) %>%
left_join(hb, c("hbt" = "hb")) %>%
left_join(hospitals, c("treatment_location" = "location")) %>%
rename(total_attendance = number_of_attendances_aggregate,
wait_lt_4hrs = number_meeting_target_aggregate,
wait_gt_8hrs = attendance_greater8hrs,
wait_gt_12hrs = attendance_greater12hrs,
hospital_id = treatment_location,
hospital_name = location_name) %>%
select(date_ym, year, month, hb_name, hospital_id, hospital_name, department_type,
total_attendance, wait_lt_4hrs, wait_gt_8hrs, wait_gt_12hrs) %>%
mutate(wait_gt_4hrs = total_attendance - wait_lt_4hrs, .after = wait_lt_4hrs) %>%
mutate(across(total_attendance:wait_gt_12hrs, .fns = ~coalesce(., 0)))
analysis
waiting_times %>%
mutate(is_covid_year = case_when(
year <= 2019 ~ FALSE,
year >= 2020 ~ TRUE
)) %>%
group_by(is_covid_year) %>%
summarise(sum_attendance = sum(total_attendance),
wait_target = sum(wait_lt_4hrs)) %>%
pivot_longer(wait_target, names_to = "wait_time", values_to = "value") %>%
mutate(proportion = value / sum_attendance) %>%
ggplot(aes(ymax = proportion, ymin = 0, xmax = 2, xmin = 1, fill = proportion)) +
geom_rect(aes(ymax = 1, ymin = 0, xmax = 2, xmin = 1), fill = "grey80") +
geom_rect() +
coord_polar(theta = "y",start=-pi/2) + xlim(c(0, 2)) + ylim(c(0,2)) +
geom_text(aes(x = 0, y = 0, label = scales::percent(proportion, accuracy = 0.1)), size = 6.5) +
geom_text(aes(x = 0.5, y = 1.5), label = c("Pre-Covid", "During Covid"), family="Poppins Light", size=4.2) +
facet_wrap(~is_covid_year, nrow = 1) +
theme_void() +
# scale_fill_manual(values = c("red"="#C9146C", "orange"="#DA9112", "green"="#129188")) +
scale_colour_manual(values = c("red"="#C9146C", "orange"="#DA9112", "green"="#129188")) +
theme(strip.background = element_blank(),
strip.text.x = element_blank(),
legend.position = "none") +
# ,
# +
labs(title = " Percentage of admissions achieving target wait times (<4hrs)")
library(ggforce)
library(scales)
waiting_times %>%
mutate(is_covid_year = case_when(
year <= 2019 ~ FALSE,
year >= 2020 ~ TRUE
)) %>%
group_by(is_covid_year) %>%
summarise(sum_attendance = sum(total_attendance),
wait_target = sum(wait_lt_4hrs)) %>%
pivot_longer(wait_target, names_to = "wait_time", values_to = "value") %>%
mutate(proportion = value / sum_attendance) %>%
mutate(ymin = rescale(0, to = pi*c(-.5,.5), from = 0:1),
ymax = rescale(proportion, to = pi*c(-.5,.5), from = 0:1)) %>%
ggplot(aes(x0 = 0, y0 = 0, r0 = .5, r = 1)) +
geom_arc_bar(aes(start = - pi / 2, end = pi / 2), fill = "grey80") +
geom_arc_bar(aes(x0 = 0, y0 = 0, r0 = .5, r = 1, start = ymin, end = ymax, fill = proportion)) +
coord_fixed() +
facet_wrap(~ is_covid_year) +
ylim(-0.3, 1) +
geom_text(aes(x = 0, y = 0.01, label = scales::percent(proportion, accuracy = 0.1)), size = 6.5) +
geom_text(aes(x = 0, y = -0.25), label = c("Pre-Covid", "During Covid"), family= "Poppins Light", size=4.2) +
theme_void() +
theme(strip.background = element_blank(),
strip.text = element_blank(),
legend.position = "none",
title = element_text(vjust = 1),
plot.margin = unit(c(0, 0, 0, 0), "cm")) +
labs(title = " Percentage of admissions achieving target wait times (<4hrs)\n")
Colour palette
pal <- c(rgb(199, 175, 117, maxColorValue = 255),
rgb(124, 36, 24, maxColorValue = 255),
rgb(210, 221, 213, maxColorValue = 255),
rgb(168, 106, 57, maxColorValue = 255),
rgb(222, 224, 227, maxColorValue = 255),
rgb(186, 158, 53, maxColorValue = 255),
rgb(6, 57, 83, maxColorValue = 255),
rgb(109, 67, 85, maxColorValue = 255)
)
show_col(pal)
Rob’s problem
inpatient_and_daycase_by_nhs_board_of_treatment_and_simd_non_covid_cleaned %>%
select(quarter_year, year, hb_name, simd, admission_type, stays, is_covid_year) %>%
filter(stays >0) %>%
filter(hb_name != “”) %>%
Error: unexpected input in:
" filter(stays >0) %>%
filter(hb_name != “"
health_board_map %>%
plot_geo(locationmode = "Scotland") %>%
add_trace(text = ~hb_name)
No scattergeo mode specifed:
Setting the mode to markers
Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
No scattergeo mode specifed:
Setting the mode to markers
Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
---
title: "R Notebook"
output: html_notebook
---

# Load in libraries
```{r}

library(tidyverse)
library(here)
library(janitor)
library(tsibble)
library(lubridate)
library(sf)

```
# Load in data sets
```{r}

waiting_times_raw <- read_csv(here("raw_data/non_covid/monthly_ae_waitingtimes_202206.csv")) %>% 
  clean_names()

hb <- read_csv(here("clean_data/hb_list_simple.csv"))

hospitals <- read_csv(here("raw_data/general/current-hospital_flagged20211216.csv")) %>% 
  clean_names() %>% 
  select(location, location_name)

```

# clean dataset

The following cleaning does the following to the waiting times data set: 
- converts month column to year_month dates and extracts years and months  
- joins health board and hospital names to data set  
- renames columns to more easily understandable names  
- removes columns relating to "episodes", "qualifier codes" and "discharges"
- adds new value of wait greater than 4 hours to the data  
- _replaces all NA values with 0 (this step may be optional)_

Please note that 4 hours wait time is the "Target" wait time for the NHS and is
represented by the "wait_lt_4hrs" column values.
```{r}
# cleaning script
waiting_times <- waiting_times_raw %>% 
  mutate(date_ym = ym(month), .before = month,
         month = month(date_ym, label = TRUE, abbr = FALSE),
         year = year(date_ym)) %>% 
  left_join(hb, c("hbt" = "hb")) %>% 
  left_join(hospitals, c("treatment_location" = "location")) %>% 
  rename(total_attendance = number_of_attendances_aggregate,
         wait_lt_4hrs = number_meeting_target_aggregate,
         wait_gt_8hrs = attendance_greater8hrs,
         wait_gt_12hrs = attendance_greater12hrs,
         hospital_id = treatment_location, 
         hospital_name = location_name) %>%
  select(date_ym, year, month, hb_name, hospital_id, hospital_name, department_type,
         total_attendance, wait_lt_4hrs, wait_gt_8hrs, wait_gt_12hrs) %>% 
  mutate(wait_gt_4hrs = total_attendance - wait_lt_4hrs, .after = wait_lt_4hrs) %>%
  mutate(across(total_attendance:wait_gt_12hrs, .fns = ~coalesce(., 0)))

waiting_times <- read_csv(here("clean_data/wait_times.csv")) %>% 
  mutate(month = month(date_ym, label = TRUE, abbr = FALSE)) 
```

```{r}
# create table with proportions of wait times used for the analysis
waiting_times_prop <- waiting_times %>% 
  pivot_longer(contains("wait"), names_to = "wait_category", values_to = "number_of_attendances") %>% 
  mutate(wait_prop = number_of_attendances / total_attendance,
         wait_category = str_replace(wait_category, "wait", "prop")) %>% 
  select(-number_of_attendances) %>% 
  pivot_wider(names_from = wait_category, values_from = wait_prop)


waiting_times_prop
```

# Analysis

## How does hospital attendances change with time?

```{r}
# plot all data - attendances per year
waiting_times %>% 
  group_by(year) %>% 
  summarise(total_attendance = sum(total_attendance)) %>% 
  ggplot(aes(x = year, y = total_attendance)) +
  geom_line()

# attendances - all data covid seasonal analysis
waiting_times %>% 
  group_by(year, month) %>% 
  summarise(total_attendance = sum(total_attendance)) %>% 
  ggplot(aes(x = month, y = total_attendance, group = year, col = factor(year))) +
  geom_line()

# attendances per month - pre-covid seasonal analysis
waiting_times %>% 
  filter(year < 2020) %>% 
  group_by(year, month) %>% 
  summarise(total_attendance = sum(total_attendance)) %>% 
  ggplot(aes(x = month, y = total_attendance, group = year, col = factor(year))) +
  geom_line()

# attendances per month - covid seasonal analysis
waiting_times %>% 
  filter(year >= 2020) %>% 
  group_by(year, month) %>% 
  summarise(total_attendance = sum(total_attendance)) %>% 
  ggplot(aes(x = month, y = total_attendance, group = year, col = factor(year))) +
  geom_line()

```

```{r}

health_board_input <- c("Borders", "Lothian", "Ayrshire and Arran", "Dumfries and Galloway")

# health_board_input <- hb$hb_name

paste("Multiple HB:\n", paste(sort(health_board_input), collapse = ", "), sep = "\n")

str_c("Multiple HBs:\n", str_c("Borders", "Lothian", "Dumfries", sep = ",\n"))

# plot all data - attendances per year

  
  if(length(health_board_input) <= 3) {
   
    #first two rows will be from reactive function
    p <- waiting_times %>% 
      filter(hb_name %in% health_board_input) %>% 
      group_by(date_ym, hb_name) %>% 
      summarise(total_attendance = sum(total_attendance)) %>% 
      ggplot(aes(x = date_ym, y = total_attendance, col = hb_name)) +
      geom_line()

  } else {
    
    if(length(health_board_input) == 14) {
      hb_label <- "All Health Boards"
    } else {
      hb_label <- str_c("Total of Multiple HBs:\n",
                        str_c(health_board_input, collapse = ",\n"))
    }
 
    p <- waiting_times %>% 
      filter(hb_name %in% health_board_input) %>% 
      mutate(hb_label = hb_label) %>% 
      group_by(date_ym) %>% 
      summarise(total_attendance = sum(total_attendance)) %>% 
      ggplot(aes(x = date_ym, y = total_attendance, colour = hb_label)) +
      geom_line()
    
  }

p  +
  theme_classic() +
  scale_y_continuous(labels = comma, 
                     expand = c(0, 0),
                     limits = c(0, NA)) +
  scale_x_date(date_labels = "%Y",
               date_breaks = "1 year") +
  labs(title = "Total hospital attendances",
       subtitle = "July 2007 to June 2022", 
       col = "Health Board",
       y = "Total attendances") +
  theme(axis.title.x = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1),
        legend.text.align = 0)

```


# How do wait times change per year? (in terms of numbers)

```{r}
# wait times less than 4 hours
waiting_times %>% 
  group_by(year) %>% 
  summarise(wait_lt_4hrs = sum(wait_lt_4hrs)) %>% 
  ggplot(aes(x = year, y = wait_lt_4hrs)) +
  geom_col()

# wait times more than 4 hours
waiting_times %>% 
  group_by(year) %>% 
  summarise(wait_gt_4hrs = sum(wait_gt_4hrs)) %>% 
  ggplot(aes(x = year, y = wait_gt_4hrs)) +
  geom_col()

# wait times more than 8 hours
waiting_times %>% 
  group_by(year) %>% 
  summarise(wait_gt_8hrs = sum(wait_gt_8hrs)) %>% 
  ggplot(aes(x = year, y = wait_gt_8hrs)) +
  geom_col()

# wait times more than 12 hours
waiting_times %>% 
  group_by(year) %>% 
  summarise(wait_gt_12hrs = sum(wait_gt_12hrs)) %>% 
  ggplot(aes(x = year, y = wait_gt_12hrs)) +
  geom_col()

```

# How do wait times change per year? (in terms of proportion)

```{r}

# wait times less than 4 hours
waiting_times %>% 
  group_by(year) %>% 
  summarise(total_attendance = sum(total_attendance),
            wait_lt_4hrs = sum(wait_lt_4hrs),
            prop_lt_4hrs = wait_lt_4hrs / total_attendance) %>% 
  ggplot(aes(x = year, y = prop_lt_4hrs)) +
  geom_col()

# wait times more than 4 hours
waiting_times %>% 
  group_by(year) %>% 
  summarise(total_attendance = sum(total_attendance),
            wait_gt_4hrs = sum(wait_gt_4hrs),
            prop_gt_4hrs = wait_gt_4hrs / total_attendance) %>% 
  ggplot(aes(x = year, y = prop_gt_4hrs)) +
  geom_col()

# wait times more than 8 hours
waiting_times %>% 
  group_by(year) %>% 
  summarise(total_attendance = sum(total_attendance),
            wait_gt_8hrs = sum(wait_gt_8hrs),
            prop_gt_8hrs = wait_gt_8hrs / total_attendance) %>% 
  ggplot(aes(x = year, y = prop_gt_8hrs)) +
  geom_col()

# wait times more than 12 hours
waiting_times %>% 
  group_by(year) %>% 
  summarise(total_attendance = sum(total_attendance),
            wait_gt_12hrs = sum(wait_gt_12hrs),
            prop_gt_12hrs = wait_gt_12hrs / total_attendance) %>% 
  ggplot(aes(x = year, y = prop_gt_12hrs)) +
  geom_col() +
  scale_y_continuous(expand = c(0,0),
                     labels = scales::percent) +
  theme_classic() +
  theme(axis.title.x = element_blank()) +
  labs(y = "Percentage of attendances greater than 12 hours",
       title = "Wait times of >12hours each year")

```
# How do wait times differ between departments? (in terms of proportions)

```{r}

# wait times less than 4 hours
waiting_times %>% 
  group_by(year, department_type) %>% 
  summarise(total_attendance = sum(total_attendance),
            wait_lt_4hrs = sum(wait_lt_4hrs),
            prop_lt_4hrs = wait_lt_4hrs / total_attendance) %>% 
  ggplot(aes(x = year, y = prop_lt_4hrs, fill = department_type)) +
  geom_col(position = "dodge")

# wait times more than 4 hours
waiting_times %>% 
  group_by(year, department_type) %>% 
  summarise(total_attendance = sum(total_attendance),
            wait_gt_4hrs = sum(wait_gt_4hrs),
            prop_gt_4hrs = wait_gt_4hrs / total_attendance) %>% 
  ggplot(aes(x = year, y = prop_gt_4hrs, fill = department_type)) +
  geom_col(position = "dodge")

# wait times more than 8 hours
waiting_times %>% 
  group_by(year, department_type) %>% 
  summarise(total_attendance = sum(total_attendance),
            wait_gt_8hrs = sum(wait_gt_8hrs),
            prop_gt_8hrs = wait_gt_8hrs / total_attendance) %>% 
  ggplot(aes(x = year, y = prop_gt_8hrs, fill = department_type)) +
  geom_col(position = "dodge")

# wait times more than 12 hours
waiting_times %>% 
  group_by(year, department_type) %>% 
  summarise(total_attendance = sum(total_attendance),
            wait_gt_12hrs = sum(wait_gt_12hrs),
            prop_gt_12hrs = wait_gt_12hrs / total_attendance) %>% 
  ggplot(aes(x = year, y = prop_gt_12hrs, fill = department_type)) +
  geom_col(position = "dodge")

```

# How do wait times change per year & month? (in terms of numbers)

```{r}
# wait times less than 4 hours
waiting_times %>% 
  group_by(year, month) %>% 
  summarise(wait_lt_4hrs = sum(wait_lt_4hrs)) %>% 
  ggplot(aes(x = month, y = wait_lt_4hrs, group = year, col = factor(year))) +
  geom_line()

# wait times more than 4 hours
waiting_times %>% 
  group_by(year, month) %>% 
  summarise(wait_gt_4hrs = sum(wait_gt_4hrs)) %>% 
  ggplot(aes(x = month, y = wait_gt_4hrs, group = year, col = factor(year))) +
  geom_line()

# wait times more than 8 hours
waiting_times %>% 
  group_by(year, month) %>% 
  summarise(wait_gt_8hrs = sum(wait_gt_8hrs)) %>% 
  ggplot(aes(x = month, y = wait_gt_8hrs, group = year, col = factor(year))) +
  geom_line()

# wait times more than 12 hours
waiting_times %>% 
  group_by(year, month) %>% 
  summarise(wait_gt_12hrs = sum(wait_gt_12hrs)) %>% 
  ggplot(aes(x = month, y = wait_gt_12hrs, group = year, col = factor(year))) +
  geom_line()

```

# How do wait times vary across health boards?

```{r}
# in terms of numbers
waiting_times %>% 
  group_by(date_ym, health_board) %>% 
  summarise(wait_gt_8hrs = sum(wait_gt_8hrs)) %>% 
  ggplot(aes(x = date_ym, y = wait_gt_8hrs, col = health_board)) +
  geom_line() +
  labs(title = "Number of waits > 8 hours for each healthboard")

# in terms of proportion
waiting_times_prop %>% 
  group_by(date_ym, health_board) %>% 
  summarise(prop_gt_8hrs = sum(total_attendance * prop_gt_8hrs)/
              sum(total_attendance)) %>% 
  ggplot(aes(x = date_ym, y = prop_gt_8hrs, col = health_board)) +
  geom_line() +
  labs(title = "Proportion of waits > 8 hours for each healthboard")


# find top 5 healthboards (in terms of attendances)
top_5_attendances_by_hb <- waiting_times_prop %>% 
  group_by(health_board) %>% 
  summarise(total_att = sum(total_attendance)) %>% 
  slice_max(total_att, n = 5)

# find bottom 5 healthboards (in terms of attendances)
bottom_5_attendances_by_hb <- waiting_times_prop %>% 
  group_by(health_board) %>% 
  summarise(total_att = sum(total_attendance)) %>% 
  slice_min(total_att, n = 5)

# wait times across the top 5 healthboards > 8 hours
waiting_times_prop %>% 
  filter(health_board %in% top_5_attendances_by_hb$health_board) %>% 
  group_by(date_ym, health_board) %>% 
  summarise(prop_gt_8hrs = sum(total_attendance * prop_gt_8hrs)/
              sum(total_attendance)) %>% 
  ggplot(aes(x = date_ym, y = prop_gt_8hrs, col = health_board)) +
  geom_line() +
  labs(title = "Proportion of waits > 8 hours for the top 5 most attended healthboards")

# wait times across the bottom 5 healthboards > 8 hours
waiting_times_prop %>% 
  filter(health_board %in% bottom_5_attendances_by_hb$health_board) %>% 
  group_by(date_ym, health_board) %>% 
  summarise(prop_gt_8hrs = sum(total_attendance * prop_gt_8hrs)/
              sum(total_attendance)) %>% 
  ggplot(aes(x = date_ym, y = prop_gt_8hrs, col = health_board)) +
  geom_line() +
  labs(title = "Proportion of waits > 8 hours for the bottom 5 most attended healthboards")


# wait times across the top 5 healthboards > 12 hours
waiting_times_prop %>% 
  filter(health_board %in% top_5_attendances_by_hb$health_board) %>% 
  group_by(date_ym, health_board) %>% 
  summarise(prop_gt_12hrs = sum(total_attendance * prop_gt_12hrs)/
              sum(total_attendance)) %>% 
  ggplot(aes(x = date_ym, y = prop_gt_12hrs, col = health_board)) +
  geom_line() +
  labs(title = "Proportion of waits > 12 hours for the bottom 5 most attended healthboards")

# wait times across the bottom 5 healthboards > 12 hours
waiting_times_prop %>% 
  filter(health_board %in% bottom_5_attendances_by_hb$health_board) %>% 
  group_by(date_ym, health_board) %>% 
  summarise(prop_gt_12hrs = sum(total_attendance * prop_gt_12hrs)/
              sum(total_attendance)) %>% 
  ggplot(aes(x = date_ym, y = prop_gt_12hrs, col = health_board)) +
  geom_line() +
  labs(title = "Proportion of waits > 12 hours for the bottom 5 most attended healthboards")

```

```{r}

# wait times across each hospital in the Borders healthboard
waiting_times_prop %>% 
  # filter(health_board == "NHS Borders") %>% 
  group_by(date_ym, hospital_name) %>% 
  summarise(prop_gt_8hrs = sum(total_attendance * prop_gt_8hrs)/
              sum(total_attendance)) %>% 
  ggplot(aes(x = date_ym, y = prop_gt_8hrs, col = hospital_name)) +
  geom_line() +
  labs(title = "Proportion of waits > 8 hours for the bottom 5 most attended healthboards") +
  theme(legend.position = "none")

```

# most affected healthboards
```{r}
# in terms of proportion
top_5_hospital_props <- waiting_times_prop %>% 
  group_by(hospital_name) %>% 
  summarise(max_prop = max(prop_gt_12hrs)) %>% 
  arrange(desc(max_prop)) %>% 
  slice_max(max_prop, n = 5)
  
# in terms of numbers
waiting_times %>% 
  group_by(hospital_name) %>% 
  summarise(max_prop = max(wait_gt_12hrs)) %>% 
  arrange(desc(max_prop))

```

```{r}

# wait times across each hospital in the Borders healthboard
waiting_times_prop %>% 
  filter(hospital_name %in% top_5_hospital_props$hospital_name) %>% 
  group_by(date_ym, hospital_name) %>% 
  summarise(prop_gt_12hrs = sum(total_attendance * prop_gt_12hrs)/
              sum(total_attendance)) %>% 
  ggplot(aes(x = date_ym, y = prop_gt_12hrs, col = hospital_name)) +
  geom_line() +
  labs(title = "Proportion of waits > 8 hours for the bottom 5 most attended healthboards")

```
## For the years 2007 to 2019, provide a summary of the effects of wait times at different times of the year.
```{r}

waiting_times_summary <- waiting_times_prop %>% 
  filter(year <= 2019) %>% 
  mutate(month = factor(month, levels = c("July", "August", "September", "October", "November", "December", "January", "February", "March", "April", "May", "June"))) %>% 
  group_by(month) %>% 
  summarise(total_attends = mean(total_attendance),
            prop_lt_4hrs = sum(total_attendance * prop_lt_4hrs)/ sum(total_attendance),
            prop_gt_4hrs = sum(total_attendance * prop_gt_4hrs)/ sum(total_attendance),
            prop_gt_8hrs = sum(total_attendance * prop_gt_8hrs)/ sum(total_attendance),
            prop_gt_12hrs = sum(total_attendance * prop_gt_12hrs)/ sum(total_attendance)) 

waiting_times_summary %>% 
  ggplot(aes(x = month, y = total_attends)) +
  geom_col() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        axis.title.x = element_blank()) +
  labs(y = "Average monthly attendances",
       title = "Average A&E attendances",
       subtitle = "From July 2007 to December 2019 ") +
  geom_smooth()

waiting_times_summary %>% 
  ggplot(aes(x = month, y = prop_lt_4hrs, group = 1)) +
  geom_point() +
  labs(title = "Seasonality of proportion of visits with a wait time < 4 hours",
         subtitle = "2007 to 2019") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  geom_smooth()

waiting_times_summary %>% 
  ggplot(aes(x = month, y = prop_gt_4hrs, group = 1)) +
  geom_point() +
    labs(title = "Seasonality of proportion of visits with a wait time > 4 hours",
         subtitle = "2007 to 2019") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  geom_smooth()


waiting_times_summary %>%
  ggplot(aes(x = month, y = prop_gt_8hrs, group = 1)) +
  geom_point() +
    labs(title = "Seasonality of proportion of visits with a wait time > 8 hours",
         subtitle = "2007 to 2019") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  geom_smooth()


waiting_times_summary %>% 
  ggplot(aes(x = month, y = prop_gt_12hrs, group = 1)) +
  geom_point() +
    scale_y_continuous(labels = scales::percent) +
    labs(title = "Seasonality of proportion of visits with a wait time > 12 hours",
         subtitle = "Pre-covid: 2007 to 2019",
         y = "Percentage of attendances > 12 hours") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        axis.title.x = element_blank()) +
  geom_smooth(se = FALSE)




```
The number of A&E attendances decrease during the winter months however the proportion of wait times greater than 4hours increases significantly between November and February highlighting that the complexity of cases in the winter mean that it takes hospitals longer to discharge patients. Perhaps combining this output with the bed occupation rates during the winter months could further supplement this relationship. 


```{r}

waiting_times_prop %>% 
  # filter(year <= 2019) %>% 
  mutate(month = factor(month, levels = c("July", "August", "September", "October", "November", "December", "January", "February", "March", "April", "May", "June"))) %>% 
  group_by(month, health_board) %>% 
  summarise(total_attends = mean(total_attendance),
            prop_lt_4hrs = sum(total_attendance * prop_lt_4hrs)/ sum(total_attendance),
            prop_gt_4hrs = sum(total_attendance * prop_gt_4hrs)/ sum(total_attendance),
            prop_gt_8hrs = sum(total_attendance * prop_gt_8hrs)/ sum(total_attendance),
            prop_gt_12hrs = sum(total_attendance * prop_gt_12hrs)/ sum(total_attendance)) %>% 
  ggplot(aes(x = month, y = prop_gt_12hrs, group = health_board, colour = health_board)) +
  geom_line() +
  labs(title = "Seasonality of proportion of visits with a wait time > 12 hours") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
```


# How do attendances to different specialties change over time?

```{r}

specialties_raw <- read_csv(here("raw_data/non_covid/inpatient_and_daycase_by_nhs_board_of_treatment_and_specialty.csv")) %>%
  clean_names()

specialties_raw

```

```{r}

specialties <- specialties_raw %>% 
  inner_join(hb, "hb") %>% 
  inner_join(hospitals, "location") %>% 
  select(-ends_with("qf"), -hb, - specialty) %>% 
  select(quarter, hb_name, location, location_name, everything()) %>% 
  mutate(year = as.numeric(str_sub(quarter,1, 4)), .before = quarter) %>% 
  mutate(is_covid_year = case_when(
    year >= 2020 ~ TRUE,
    year < 2020 ~ FALSE
  ))
  # mutate(quarter = str_sub(quarter, -2, -1))

specialties

```

# all episodes vs time
```{r}

specialties %>% 
  distinct(admission_type)

specialties %>% 
  group_by(quarter) %>% 
  filter(admission_type == "All Inpatients and Day cases") %>% 
  summarise(total_episodes = sum(episodes)) %>% 
  ggplot(aes(x = quarter, y = total_episodes, group = 1)) +
  geom_line() +
  geom_point() +
  theme_classic() +
  scale_y_continuous(labels = scales::comma, expand = c(0,0), limits = c(0,NA)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title = "Total admissions for All Inpatients and Day Cases",
       x = "Quarter",
       y = "Total Episodes")

```

# All Day Cases

```{r}

specialties %>% 
  distinct(admission_type)

specialties %>% 
  group_by(quarter, admission_type) %>% 
  filter(admission_type %in% c("All Day cases", "All Inpatients")) %>% 
  summarise(total_episodes = sum(episodes)) %>% 
  ggplot(aes(x = quarter, y = total_episodes, colour = admission_type)) +
  geom_line(aes(group = admission_type)) +
  geom_point() +
  theme_classic() +
  scale_y_continuous(labels = scales::comma, expand = c(0,0), limits = c(0,NA)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title = "Total admissions for All Day Cases and All Inpatients",
       x = "Quarter",
       y = "Total Episodes",
       col = "Admission Type")


```

```{r}

all_inpatient_cats <- c("Elective Inpatients",
                          "Emergency Inpatients",
                          "Transfers")

specialties %>% 
  group_by(quarter, admission_type) %>% 
  filter(admission_type %in% all_inpatient_cats) %>% 
  summarise(total_episodes = sum(episodes)) %>% 
  ggplot(aes(x = quarter, y = total_episodes, colour = admission_type)) +
  geom_line(aes(group = admission_type)) +
  geom_point() +
  theme_classic() +
  scale_y_continuous(labels = scales::comma, expand = c(0,0), limits = c(0,NA)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title = "Total admissions for Inpatients",
       x = "Quarter",
       y = "Total Episodes",
       col = "Admission Type")

```

```{r}

specialties %>% 
  distinct(admission_type, specialty_name)

```

```{r}

specialties %>% 
  group_by(quarter, admission_type, specialty_name) %>% 
  filter(admission_type == "Emergency Inpatients") %>% 
  summarise(total_episodes = sum(episodes)) %>% 
  ggplot(aes(x = quarter, y = total_episodes, colour = specialty_name)) +
  geom_line(aes(group = specialty_name)) +
  geom_point() +
  theme_classic() +
  scale_y_continuous(labels = scales::comma, expand = c(0,0), limits = c(0,10000)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title = "Total admissions for Emergency Inpatients",
       subtitle = "Split by specialty",
       x = "Quarter",
       y = "Total Episodes",
       col = "Admission Type")

```

# Which departments changed the most between Covid and pre-covid
```{r}

total_admissions <- specialties %>% 
  group_by(is_covid_year) %>% 
  filter(admission_type == "All Inpatients and Day cases") %>% 
  summarise(total_admissions = sum(episodes)) %>% 
  pull(total_admissions)

change_in_at_specialties <- specialties %>% 
  filter(admission_type %in% c(all_inpatient_cats, "All Day cases")) %>% 
  group_by(admission_type, specialty_name, is_covid_year) %>% 
  summarise(total_episodes = sum(episodes)) %>% 
  pivot_wider(names_from = is_covid_year, values_from = total_episodes) %>% 
  rename("covid_year" = "TRUE", "pre_covid_year" = "FALSE") %>% 
  mutate(pre_covid_year_prop = pre_covid_year / total_admissions[1],
         covid_year_prop = covid_year / total_admissions[2],
         percentage_change = ((covid_year_prop / pre_covid_year_prop) - 1) * 100) %>% 
  ungroup()


```



```{r}

specialties %>% 
  filter(admission_type == "All Day cases" & specialty_name == "Cardiothoracic Surgery") %>% 
  group_by(is_covid_year) %>% 
  summarise(count = sum(episodes))

```

```{r}
# top 5 changes in specialty proportion
change_in_at_specialties %>% 
  slice_max(percentage_change, n = 5)

# sort by largest proportion, top 5
change_in_at_specialties %>% 
  slice_max(percentage_change, n = 5) %>% 
  arrange(desc(covid_year_prop))

```

```{r}

change_in_at_specialties %>% 
  filter(covid_year > 1000) %>% 
  slice_max(percentage_change, n = 5) %>% 
  mutate(label = str_c(specialty_name, "\n(", admission_type, ")")) %>% 
  ggplot(aes(x = reorder(label, sort(percentage_change)),
                         y = percentage_change)) +
  geom_col() +
  theme_classic() +
  scale_y_continuous(expand = c(0,0)) +
  theme(axis.title.x = element_blank(),
        axis.text.x = element_text(size = 8)) +
  labs(y = "Percentage Increase (%)",
       title = "Top 5 increases in hospital admissions (by specialty and admission type)",
       subtitle = "More than 1,000 admissions")

```

```{r}

change_in_specialties <- specialties %>% 
  group_by(specialty_name, is_covid_year) %>% 
  summarise(total_episodes = sum(episodes)) %>% 
  pivot_wider(names_from = is_covid_year, values_from = total_episodes) %>% 
  rename("covid_year" = "TRUE", "pre_covid_year" = "FALSE") %>% 
  mutate(pre_covid_year_prop = pre_covid_year / total_admissions[1],
         covid_year_prop = covid_year / total_admissions[2],
         percentage_change = ((covid_year_prop / pre_covid_year_prop) - 1) * 100) %>% 
  ungroup()

change_in_specialties %>% 
  slice_max(percentage_change, n = 5) %>% 
  filter(percentage_change > 0) %>% 
  arrange(desc(percentage_change)) %>% 
  ggplot(aes(x = reorder(specialty_name, percentage_change, decreasing = TRUE),
             y = percentage_change)) +
  geom_col() +
  theme_classic() +
  scale_y_continuous(expand = c(0,0)) +
  theme(axis.title.x = element_blank(),
        axis.text.x = element_text(size = 8)) +
  labs(y = "Percentage Increase (%)",
       title = "Top 5 changes in hospital admissions (by specialty) - pre-Covid vs Covid",
       subtitle = "More than 1,000 admissions")

```

```{r}


wt_with_discharges <- waiting_times_raw %>% 
  mutate(date_ym = ym(month), .before = month,
         month = month(date_ym, label = TRUE, abbr = FALSE),
         year = year(date_ym)) %>% 
  left_join(hb, c("hbt" = "hb")) %>% 
  left_join(hospitals, c("treatment_location" = "location")) %>% 
  rename(total_attendance = number_of_attendances_aggregate,
         wait_lt_4hrs = number_meeting_target_aggregate,
         wait_gt_8hrs = attendance_greater8hrs,
         wait_gt_12hrs = attendance_greater12hrs,
         health_board = hb_name,
         hospital_id = treatment_location, 
         hospital_name = location_name) %>% 
  select(-ends_with("qf")) %>% 
  select(date_ym, year, month, health_board, hospital_id, hospital_name,
         everything(), -country, - hbt)

# check if there is a trend across all years
wt_with_discharges %>% 
  group_by(date_ym, hospital_name) %>% 
  summarise(transfers = sum(discharge_destination_transfer, na.rm = TRUE)) %>% 
  ggplot(aes(x = date_ym, y = transfers, col = hospital_name)) +
  geom_line() +
  theme(legend.position = "none")

# check if there is a seasonal trend
wt_with_discharges %>% 
  mutate(month = factor(month, levels = c("July", "August", "September", "October", "November", "December", "January", "February", "March", "April", "May", "June"))) %>% 
  group_by(month, health_board) %>% 
  summarise(transfers = sum(discharge_destination_other_specialty, na.rm = TRUE)) %>% 
  ggplot(aes(x = month, y = transfers, group = health_board, col = health_board)) +
  geom_line() +
  theme(legend.position = "none") +
  facet_wrap(~ health_board, scales = "free_y") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

```

# Bed occupancy data

```{r}

beds_raw <- read_csv(here("raw_data/non_covid/beds_by_nhs_board_of_treatment_and_specialty.csv")) %>% 
  clean_names() %>% 
  select(-ends_with("qf")) 

```

```{r}
# all specialty names
beds_raw %>% 
  group_by(specialty_name) %>% 
  summarise(sum(all_staffed_beddays))

beds_raw %>% 
  filter(specialty_name != "All Acute" & specialty_name != "All Specialties") %>% 
  # group_by(specialty_name) %>% 
  summarise(sum(all_staffed_beddays))
# 209139660


beds <- beds_raw %>% 
  left_join(hb, "hb") %>% 
  left_join(hospitals, "location") %>% 
  select(quarter, hb, hb_name, location, location_name, specialty_name:percentage_occupancy) %>% 
  mutate()



```

```{r}

beds %>% 
  group_by(quarter) %>% 
  summarise(staffed_beds = sum(all_staffed_beddays),
            occupied_beds = sum(total_occupied_beddays), 
            occupation_percentage = occupied_beds / staffed_beds * 100) %>% 
  ggplot(aes(x = quarter, y = occupation_percentage, group = 1)) +
  geom_line()

```

# Wait times - baseline (pre-covid vs covid)

## Percentage of wait times within target (<4hrs) 

```{r}

# pre-covid <= 2019, covid => 2020

avg_wt_target_pre_covid <- waiting_times %>% 
  filter(year <= 2019) %>% 
  group_by(month) %>% 
  summarise(total_attendance = sum(total_attendance),
            total_wait_lt_4hrs = sum(wait_lt_4hrs),
            target_wait_prop = total_wait_lt_4hrs / total_attendance)

avg_wt_target_pre_covid %>% 
  ggplot(aes(x = month, y = target_wait_prop, group = 1)) +
  geom_line() +
  geom_smooth(se = 0)

waiting_times %>% 
  filter(year >= 2020) %>%
  group_by(year, month) %>% 
  summarise(total_attendance = sum(total_attendance),
            total_wait_lt_4hrs = sum(wait_lt_4hrs),
            target_wait_prop = total_wait_lt_4hrs / total_attendance) %>% 
  ggplot(aes(x = month, y = target_wait_prop,
                group = factor(year), col = factor(year))) +
  geom_line() +
  geom_point() +
  geom_line(data = avg_wt_target_pre_covid, aes(x = month, y = target_wait_prop,
                                                group = 1, colour = "Baseline"),
              size = 1) +
  geom_point(data = avg_wt_target_pre_covid, aes(x = month, y = target_wait_prop,
                                                group = 1, colour = "Baseline")) +
  theme_classic() +
  scale_color_manual(name = "Year", values = c("Baseline" = "darkblue",
                                               "2020" = "red",
                                               "2021" = "green",
                                               "2022" = "orange")) +
  scale_y_continuous(labels = scales::percent) +
  labs(y = "Percentage of admissions (%)",
       title = "Proportion of waiting times greater than 12 hours",
       subtitle = "Pre-covid (2007-2019) vs covid (2020+)",
       col = "Year") +
  theme(axis.title.x = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1))


```

## Percentage of wait times >8hrs 

```{r}

avg_wt_gt_8hrs_pre_covid <- waiting_times %>% 
  filter(year <= 2019) %>% 
  group_by(month) %>% 
  summarise(total_attendance = sum(total_attendance),
            total_wait_gt_8hrs = sum(wait_gt_8hrs),
            target_wait_prop = total_wait_gt_8hrs / total_attendance)

avg_wt_gt_8hrs_pre_covid %>% 
  ggplot(aes(x = month, y = target_wait_prop, group = 1)) +
  geom_line() +
  geom_smooth(se = 0)

waiting_times %>% 
  filter(year >= 2020) %>%
  group_by(year, month) %>% 
  summarise(total_attendance = sum(total_attendance),
            total_wait_gt_8hrs = sum(wait_gt_8hrs),
            target_wait_prop = total_wait_gt_8hrs / total_attendance) %>% 
  ggplot(aes(x = month, y = target_wait_prop,
                group = factor(year), col = factor(year))) +
  geom_line() +
  geom_point() +
  geom_line(data = avg_wt_gt_8hrs_pre_covid, aes(x = month, y = target_wait_prop,
                                                group = 1, colour = "Baseline"),
              size = 1) +
    geom_point(data = avg_wt_gt_8hrs_pre_covid, aes(x = month, y = target_wait_prop,
                                                group = 1, colour = "Baseline")) +
  theme_classic() +
  scale_color_manual(name = "Year", values = c("Baseline" = "darkblue", "2020" = "red", "2021" = "green", "2022" = "orange")) +
  scale_y_continuous(labels = scales::percent) +
  labs(y = "Percentage of admissions (%)",
       title = "Proportion of waiting times greater than 8 hours",
       subtitle = "Pre-covid (2007-2019) vs covid (2020+)",
       col = "Year") +
  theme(axis.title.x = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1))

```

## Percentage of wait times >12hrs 

```{r}

avg_wt_gt_12hrs_pre_covid <- waiting_times %>% 
  filter(year <= 2019) %>% 
  group_by(month) %>% 
  summarise(total_attendance = sum(total_attendance),
            total_wait_gt_12hrs = sum(wait_gt_12hrs),
            target_wait_prop = total_wait_gt_12hrs / total_attendance)

avg_wt_gt_12hrs_pre_covid %>% 
  ggplot(aes(x = month, y = target_wait_prop, group = 1)) +
  geom_line() +
  geom_smooth(se = 0)

waiting_times %>% 
  filter(year >= 2020) %>%
  group_by(year, month) %>% 
  summarise(total_attendance = sum(total_attendance),
            total_wait_gt_12hrs = sum(wait_gt_12hrs),
            target_wait_prop = total_wait_gt_12hrs / total_attendance) %>% 
  ggplot(aes(x = month, y = target_wait_prop,
                group = factor(year), col = factor(year))) +
  geom_line() +
  geom_point() +
  geom_line(data = avg_wt_gt_12hrs_pre_covid, aes(x = month, y = target_wait_prop,
                                                group = 1, colour = "Baseline"),
              size = 1) +
    geom_point(data = avg_wt_gt_12hrs_pre_covid, aes(x = month, y = target_wait_prop,
                                                group = 1, colour = "Baseline")) +
  theme_classic() +
  scale_color_manual(name = "Year", values = c("Baseline" = "darkblue",
                                               "2020" = "red",
                                               "2021" = "green",
                                               "2022" = "orange")) +
  scale_y_continuous(labels = scales::percent) +
  labs(y = "Percentage of admissions (%)",
       title = "Proportion of waiting times greater than 12 hours",
       subtitle = "Pre-covid (2007-2019) vs covid (2020+)",
       col = "Year") +
  theme(axis.title.x = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1))

```


# Jack's question:

```{r}

beds_treatment_specialty <- beds


df1 <- beds_treatment_specialty %>% #pre covid dates, all health baords
  mutate(date = yq(quarter), month = month(date, label = TRUE, abbr = TRUE),
         year = year(date)) %>% 
  filter(year <= 2019, hb == "S08000015") %>% # hb selected by user input
  group_by(month) %>% 
  summarise(avg_occupancy = mean(percentage_occupancy, na.rm = TRUE))


df2 <- beds_treatment_specialty %>% 
  mutate(date = yq(quarter), month = month(date, label = TRUE, abbr = TRUE),
         year = year(date)) %>% 
  filter(year > 2019, hb == "S08000015") %>% #post covid, filter based on map input
  group_by(year, month) %>% 
  summarise(avg_occupancy = mean(percentage_occupancy, na.rm = TRUE))

## no longer required
# df3 <- df1 %>% 
#   mutate(Type = "baseline") %>% 
#   bind_rows(df2 %>% 
#               mutate(Type = "post-covid"))

df2 %>% 
  ggplot(aes(x = month, y = avg_occupancy, colour = factor(year), group = year)) +
  geom_point() +
  geom_line() +
  geom_line(data = df1, aes(x = month, y = avg_occupancy, group = 1, col = "Baseline")) +
  geom_point(data = df1, aes(x = month, y = avg_occupancy, group = 1, col = "Baseline")) +
  scale_color_manual(name = "Year", values = c("Baseline" = "darkblue",
                                               "2020" = "red",
                                               "2021" = "green")) +
  labs(title = "Avg Occupancy by Health Board vs Baseline", # need to update these
       x = "Month",
       y = "Avg Occupancy Rate")


df2_wide <- df2 %>% 
  pivot_wider(names_from = year, values_from = avg_occupancy)

df12 <- df1 %>% 
  rename(baseline = avg_occupancy) %>% 
  left_join(df2_wide, "month")

df12
```

```{r}

df3 <- beds_treatment_specialty %>% #pre covid dates, all health baords
  mutate(date = yq(quarter), month = month(date, label = TRUE, abbr = TRUE),
         year = year(date)) %>% 
  filter(year <= 2019, hb == "S08000015") %>% # hb selected by user input
  group_by(month) %>% 
  summarise(total_available = sum(all_staffed_beddays), 
            total_occupied = sum(total_occupied_beddays),
            avg_occupancy = total_occupied / total_available)


df4 <- beds_treatment_specialty %>% 
  mutate(date = yq(quarter), month = month(date, label = TRUE, abbr = TRUE),
         year = year(date)) %>% 
  filter(year > 2019, hb == "S08000015") %>% #post covid, filter based on map input
  group_by(year, month) %>% 
  summarise(total_available = sum(all_staffed_beddays), 
            total_occupied = sum(total_occupied_beddays),
            avg_occupancy = total_occupied / total_available)

df4 %>% 
  ggplot(aes(x = month, y = avg_occupancy, colour = factor(year), group = year)) +
  geom_point() +
  geom_line() +
  geom_line(data = df3, aes(x = month, y = avg_occupancy, group = 1, col = "Baseline")) +
  geom_point(data = df3, aes(x = month, y = avg_occupancy, group = 1, col = "Baseline")) +
  scale_color_manual(name = "Year", values = c("Baseline" = "darkblue",
                                               "2020" = "red",
                                               "2021" = "green")) +
  labs(title = "Avg Occupancy by Health Board vs Baseline", # need to update these
       x = "Month",
       y = "Avg Occupancy Rate") +
  scale_y_continuous(labels = scales::percent)


df4_wide <- df2 %>% 
  pivot_wider(names_from = year, values_from = avg_occupancy)

df34 <- df3 %>% 
  rename(baseline = avg_occupancy) %>% 
  left_join(df2_wide, "month") %>% 
  select(-total_available, - total_occupied) %>% 
  mutate(baseline = baseline * 100)

```

```{r}
# weighted average occupancy rate
df34

```

```{r}
# average of occupancy percentages
df12

```
# how does bed usage change over time
```{r}

beds %>%
  group_by(quarter) %>%
  summarise(beds_available = sum(all_staffed_beddays),
            beds_occupied = sum(total_occupied_beddays)) %>%
  pivot_longer(beds_available:beds_occupied, names_to = "bed_type",
               values_to = "num_beds") %>% 
  ggplot(aes(x = quarter, y = num_beds, colour = bed_type, group = bed_type)) +
  geom_line() +
  scale_y_continuous(labels = scales::comma) +
  theme_classic() +
  theme(axis.text = element_text(angle = 45, hjust = 1),
        axis.title.x = element_blank()) +
  labs(y = "Number of bed days",
       title = "Number of beds available/occupied over time",
       col = "Bed Type")

```
# bed use by specialty
```{r}

beds %>%
  group_by(quarter, specialty_name) %>%
  # filter(specialty_name %in% c("Clinical Radiology", "Intensive Care Medicine")) %>% 
  summarise(beds_available = sum(all_staffed_beddays),
            beds_occupied = sum(total_occupied_beddays)) %>%
  pivot_longer(beds_available:beds_occupied, names_to = "bed_type",
               values_to = "num_beds") %>% 
  ggplot(aes(x = quarter, y = num_beds, colour = specialty_name, group = specialty_name)) +
  geom_line() +
  facet_wrap(~bed_type) +
  scale_y_continuous(labels = scales::comma) +
  theme_classic() +
  theme(axis.text = element_text(angle = 45, hjust = 1),
        axis.title.x = element_blank()) +
  labs(y = "Number of bed days",
       title = "Number of beds available/occupied over time",
       col = "Bed Type")

```

```{r}

beds %>% 
  mutate(year = as.numeric(str_extract(quarter, "[0-9]{4}")),
    is_covid_year = case_when(
    year <= 2019 ~ FALSE,
    year >= 2020 ~ TRUE
    )) %>% 
  group_by(specialty_name, is_covid_year) %>% 
  summarise(beds_available = sum(all_staffed_beddays),
            beds_occupied = sum(total_occupied_beddays)) %>% 
  select(specialty_name, is_covid_year, beds_occupied) %>%  
  pivot_wider(names_from = is_covid_year, values_from = beds_occupied) %>% 
  rename("pre_covid" = "FALSE", "covid" = "TRUE") %>% 
  drop_na() %>% 
  mutate(diff_occupied = covid - pre_covid,
         diff_occupied_pc = ((covid / pre_covid) - 1) * 100) %>% 
  filter(diff_occupied_pc > 0) %>% 
  arrange(desc(diff_occupied_pc))
  

```
# waiting times
## cleaning
```{r}

hb <- read_csv(here("clean_data/hb_list_simple.csv"))

waiting_times <- waiting_times_raw %>% 
  mutate(date_ym = ym(month), .before = month,
         month = month(date_ym, label = TRUE, abbr = FALSE),
         year = year(date_ym)) %>% 
  left_join(hb, c("hbt" = "hb")) %>% 
  left_join(hospitals, c("treatment_location" = "location")) %>% 
  rename(total_attendance = number_of_attendances_aggregate,
         wait_lt_4hrs = number_meeting_target_aggregate,
         wait_gt_8hrs = attendance_greater8hrs,
         wait_gt_12hrs = attendance_greater12hrs,
         hospital_id = treatment_location, 
         hospital_name = location_name) %>% 
  select(date_ym, year, month, hb_name, hospital_id, hospital_name, department_type,
         total_attendance, wait_lt_4hrs, wait_gt_8hrs, wait_gt_12hrs) %>% 
  mutate(wait_gt_4hrs = total_attendance - wait_lt_4hrs, .after = wait_lt_4hrs) %>%
  mutate(across(total_attendance:wait_gt_12hrs, .fns = ~coalesce(., 0)))

```

## analysis
```{r}

waiting_times %>% 
  mutate(is_covid_year = case_when(
    year <= 2019 ~ FALSE,
    year >= 2020 ~ TRUE
  )) %>% 
  group_by(is_covid_year) %>% 
  summarise(sum_attendance = sum(total_attendance), 
            wait_target = sum(wait_lt_4hrs)) %>% 
  pivot_longer(wait_target, names_to = "wait_time", values_to = "value") %>% 
  mutate(proportion = value / sum_attendance) %>% 
  ggplot(aes(ymax = proportion, ymin = 0, xmax = 2, xmin = 1, fill = proportion)) +
  geom_rect(aes(ymax = 1, ymin = 0, xmax = 2, xmin = 1), fill = "grey80") +
  geom_rect() + 
  coord_polar(theta = "y",start=-pi/2) + xlim(c(0, 2)) + ylim(c(0,2)) +
  geom_text(aes(x = 0, y = 0, label = scales::percent(proportion, accuracy = 0.1)), size = 6.5) +
  geom_text(aes(x = 0.5, y = 1.5), label = c("Pre-Covid", "During Covid"), family="Poppins Light", size=4.2) + 
  facet_wrap(~is_covid_year, nrow = 1) +
  theme_void() +
  # scale_fill_manual(values = c("red"="#C9146C", "orange"="#DA9112", "green"="#129188")) +
  scale_colour_manual(values = c("red"="#C9146C", "orange"="#DA9112", "green"="#129188")) +
  theme(strip.background = element_blank(),
        strip.text.x = element_blank(),
        legend.position = "none") +
# ,
#          +
  labs(title = "   Percentage of admissions achieving target wait times (<4hrs)")


```


```{r}

library(ggforce)
library(scales)

waiting_times %>% 
  mutate(is_covid_year = case_when(
    year <= 2019 ~ FALSE,
    year >= 2020 ~ TRUE
  )) %>% 
  group_by(is_covid_year) %>% 
  summarise(sum_attendance = sum(total_attendance), 
            wait_target = sum(wait_lt_4hrs)) %>% 
  pivot_longer(wait_target, names_to = "wait_time", values_to = "value") %>% 
  mutate(proportion = value / sum_attendance) %>%
  mutate(ymin = rescale(0, to = pi*c(-.5,.5), from = 0:1), 
         ymax = rescale(proportion, to = pi*c(-.5,.5), from = 0:1)) %>%
  ggplot(aes(x0 = 0, y0 = 0, r0 = .5, r = 1)) + 
  geom_arc_bar(aes(start = - pi / 2, end = pi / 2), fill = "grey80") +
  geom_arc_bar(aes(x0 = 0, y0 = 0, r0 = .5, r = 1, start = ymin, end = ymax, fill = proportion)) +
  coord_fixed() +
  facet_wrap(~ is_covid_year) +
  ylim(-0.3, 1) +
  geom_text(aes(x = 0, y = 0.01, label = scales::percent(proportion, accuracy = 0.1)), size = 6.5) +
  geom_text(aes(x = 0, y = -0.25), label = c("Pre-Covid", "During Covid"), family= "Poppins Light", size=4.2) +
  theme_void() +
    theme(strip.background = element_blank(),
          strip.text = element_blank(),
        legend.position = "none",
        title = element_text(vjust = 1),
          plot.margin = unit(c(0, 0, 0, 0), "cm")) +
  labs(title = "   Percentage of admissions achieving target wait times (<4hrs)\n")

  

```

# Colour palette

```{r}

pal <- c(rgb(199, 175, 117, maxColorValue = 255),
         rgb(124, 36, 24, maxColorValue = 255), 
         rgb(210, 221, 213, maxColorValue = 255), 
         rgb(168, 106, 57, maxColorValue = 255), 
         rgb(222, 224, 227, maxColorValue = 255),
         rgb(186, 158, 53, maxColorValue = 255), 
         rgb(6, 57, 83, maxColorValue = 255), 
         rgb(109, 67, 85, maxColorValue = 255)
)

show_col(pal)



```

# Rob's problem 

```{r}



 inpatient_and_daycase_by_nhs_board_of_treatment_and_simd_non_covid_cleaned %>%
  select(quarter_year, year, hb_name, simd, admission_type, stays, is_covid_year) %>%
  filter(stays >0) %>%
  filter(hb_name != "") %>%
  filter(!is.na(simd)) %>%
  group_by(is_covid_year, stays, quarter_year, simd) %>%
  summarise(total_stays = sum(stays)) %>%
  distinct() %>%
  ggplot()+
  aes(x = quarter_year, y = total_stays, fill = simd)+
  geom_col(position = "fill")

```
```{r}
library(plotly)
df <- read.csv("https://raw.githubusercontent.com/plotly/datasets/master/2011_us_ag_exports.csv")
df$hover <- with(df, paste(state, '<br>', "Beef", beef, "Dairy", dairy, "<br>",
                           "Fruits", total.fruits, "Veggies", total.veggies,
                           "<br>", "Wheat", wheat, "Corn", corn))

fig <- plot_geo(df, locationmode = 'USA-states')
fig <- fig %>% add_trace(
    z = ~total.exports, text = ~hover, locations = ~code,
    color = ~total.exports, colors = 'Purples'
  )
fig <- fig %>% colorbar(title = "Millions USD")
fig <- fig %>% layout(
    title = '2011 US Agriculture Exports by State<br>(Hover for breakdown)'
  )

fig

```

```{r}

health_board_map %>%
  plot_geo(locationmode = "Scotland") %>% 
  add_trace(text = ~hb_name)
      ggplot(aes(text = hb_name)) +
      geom_sf(fill = pal[5], col = "gray40", size = 0.1) +
      geom_sf(data = health_board_map %>% filter(hb_name %in% input$health_board_input),
              fill = pal[7], size = 0.1, colour = "white") +
      theme_void()
    
    ggplotly(p,
    tooltip = "text") %>% 
      config(scrollZoom = TRUE,
             displayModeBar = F,
             showAxisDragHandles = F)

```

